import requests
import urllib3
from lxml import etree
import time
from selenium import webdriver
from selenium.webdriver.common.by import By

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
r = requests.get(url, headers=headers, verify=False)

house=[]
for i in range(1, 100):
    print('--------------------------------')
    print(f'开始爬取第{i}页')
    url = f'https://lz.esf.fang.com/house/i3{i}/'
    r = requests.get(url, headers=headers, verify=False)
    html = etree.HTML(r.text)
    roal_url = html.xpath('//*[@id="kesfqbfylb_A01_01_03"]/dd[1]/h4/a/@href')[0]
    r = requests.get(roal_url, headers=headers, verify=False)
    html = etree.HTML(r.text)
    hrefs = html.xpath('//div[@class="shop_list shop_list_4"]/dl/dt/a/@href')
    channels = html.xpath('//div[@class="shop_list shop_list_4"]/dl/dt/a/@data_channel')
    next_urls = ['https://lz.esf.fang.com' + href + '?channel=' + channel for href, channel in zip(hrefs, channels)]
    house.extend(next_urls)
print('爬取结束！')
f = open('urls.txt', 'a+', encoding='utf8')
for i in house:
    f.write(i + '\n')
f.close()